{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "code_folding": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>职位</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>地址</th>\n",
       "      <th>地区</th>\n",
       "      <th>经验</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>数据挖掘</td>\n",
       "      <td>面议</td>\n",
       "      <td>8小时前</td>\n",
       "      <td>广东省国际工程咨询有限公司</td>\n",
       "      <td>https://m.liepin.com/job/1925583723.shtml</td>\n",
       "      <td>广州-越秀区</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>数据挖掘</td>\n",
       "      <td>20-40k·13薪</td>\n",
       "      <td>2020-03-04</td>\n",
       "      <td>广州视睿电子科技有限公司</td>\n",
       "      <td>https://m.liepin.com/job/1926347943.shtml</td>\n",
       "      <td>广州</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>数据挖掘分析</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>广东三头六臂信息科技有限公司</td>\n",
       "      <td>https://m.liepin.com/job/1921608071.shtml</td>\n",
       "      <td>广州-白云区</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>数据挖掘专家</td>\n",
       "      <td>15-30k·12薪</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>广东优品智学教育科技有限公司</td>\n",
       "      <td>https://m.liepin.com/job/1923751349.shtml</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>数据挖掘岗</td>\n",
       "      <td>8-14k·12薪</td>\n",
       "      <td>2020-03-19</td>\n",
       "      <td>中国联通广东省分公司</td>\n",
       "      <td>https://m.liepin.com/job/1919408897.shtml</td>\n",
       "      <td>广州</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        职位          薪水          时间            公司名称  \\\n",
       "0    数据挖掘           面议        8小时前   广东省国际工程咨询有限公司   \n",
       "1    数据挖掘   20-40k·13薪  2020-03-04    广州视睿电子科技有限公司   \n",
       "2  数据挖掘分析   10-15k·12薪        一个月前  广东三头六臂信息科技有限公司   \n",
       "3  数据挖掘专家   15-30k·12薪        一个月前  广东优品智学教育科技有限公司   \n",
       "4   数据挖掘岗    8-14k·12薪  2020-03-19      中国联通广东省分公司   \n",
       "\n",
       "                                          地址      地区 经验  \n",
       "0  https://m.liepin.com/job/1925583723.shtml  广州-越秀区     \n",
       "1  https://m.liepin.com/job/1926347943.shtml      广州     \n",
       "2  https://m.liepin.com/job/1921608071.shtml  广州-白云区     \n",
       "3  https://m.liepin.com/job/1923751349.shtml  广州-天河区     \n",
       "4  https://m.liepin.com/job/1919408897.shtml      广州     "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from requests_html import HTMLSession\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "session =HTMLSession()\n",
    "url=\"https://m.liepin.com/zhaopin/?keyword=%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98&dqs=050020&salarylow=0&salaryhigh=999&industrys=000&compScale=000&compKind=000&pubtime=000&jobkind=&d_headId=25161ac74cdf1d84a56e404413737c63&d_ckId=25161ac74cdf1d84a56e404413737c63&d_sfrom=search_prime&d_curPage=0&d_pageSize=60&siTag=LiAE77uh7ygbLjiB5afMYg~OuMbIOh6plb8hBPDYvM6Hw\"\n",
    "headers={\n",
    "       \"User-Agent\": \"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Mobile Safari/537.36\"\n",
    "}\n",
    "r =session.get(url,headers=headers,timeout=5)\n",
    "#news = ((\" \".join(r.html.xpath('//dd[\"@class=right-info\"]/ul/li[3]/text()'))).strip()).split()\n",
    "#new=(news.strip()).split()\n",
    "cf = pd.DataFrame({\n",
    "         \"职位\": r.html.xpath('//span[@class=\"name-text\"]/text()'),\n",
    "         \"薪水\": r.html.xpath('//li[@class=\"flexbox\"]/span/text()'),\n",
    "         \"时间\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[3]/time/text()'),\n",
    "         \"公司名称\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[2]/a/text()'),\n",
    "         \"地址\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[1]/a/@href'), \n",
    "         \"地区\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[3]/a/text()'),\n",
    "         \"经验\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[3]/text()')[0].strip(),\n",
    "         #\"地址\": r.html.xpath('//dd[\"@class=right-info\"]/ul/li[2]/a/@href'),  \n",
    "        })\n",
    "cf.head() \n",
    "\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      数据挖掘 \n",
       "1      数据挖掘 \n",
       "2    数据挖掘分析 \n",
       "3    数据挖掘专家 \n",
       "4     数据挖掘岗 \n",
       "Name: 职位, dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "ef = pd.read_excel(\"猎聘网数据挖掘方面招聘信息.xlsx\", encoding=\"utf8\")\n",
    "display\n",
    "ef.head()[\"职位\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
